import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule

# //div[@class='nav']//li[position()>2 and position()<last()-1] 获取nav导航 要获取的数据
class CrawlBiqugeSpider(CrawlSpider):
    name = 'crawl_biquge'
    allowed_domains = ['www.biquge.info']
    start_urls = ['http://www.biquge.info']

    rules = (
        Rule(LinkExtractor(allow=r"\d_\d\.html", restrict_xpaths="//div[@class='nav']//li[position()>2 and position()<last()-1]")),
        # Rule(LinkExtractor(allow=r'\d\.html', restrict_xpaths="//*[@id='list']//a"), callback='parse_item', follow=True),
    )

    def parse_item(self, response):
        print(response.text)
        item = {}
        item['chapter_name'] = response.xpath('//h1/text()').get()
        contents = response.xpath("//div[@id='content']/text()").getall()
        item['contents'] = "\n".join(contents)
        # print(item)
        return item
